{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <div align='center'> 学习Pandas之透视表(pivotTab)和交叉表(crossTab) </div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pylab as plt\n",
    "\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 一. groupby , pivot_table and crosstab\n",
    "\n",
    "[参考](https://blog.csdn.net/elecjack/article/details/50760736)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.245835</td>\n",
       "      <td>0.101619</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-0.597338</td>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-2.601815</td>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.035891</td>\n",
       "      <td>-0.869209</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "0  1.245835  0.101619    a  one\n",
       "1 -0.785056 -0.661818    a  two\n",
       "2 -1.769749 -0.597338    b  one\n",
       "3 -1.212648 -2.601815    b  two\n",
       "4  0.035891 -0.869209    a  one"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame({\n",
    "    'key1':['a','a','b','b','a'],\n",
    "    'key2':['one','two','one','two','one'],\n",
    "    'data1':np.random.randn(5),\n",
    "    'data2':np.random.randn(5)})\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. groupby"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.groupby.DataFrameGroupBy'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    1.245835\n",
       "1   -0.785056\n",
       "4    0.035891\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (1) groupby one column\n",
    "df1_grouped_by_key1 = df1.groupby(by='key1')\n",
    "print(type(df1_grouped_by_key1))\n",
    "# list(df1_grouped_by_key1)[0][1]\n",
    "dict(list(df1_grouped_by_key1))['a']['data1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">data1</th>\n",
       "      <th colspan=\"8\" halign=\"left\">data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>3.0</td>\n",
       "      <td>0.165556</td>\n",
       "      <td>1.021636</td>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.374583</td>\n",
       "      <td>0.035891</td>\n",
       "      <td>0.640863</td>\n",
       "      <td>1.245835</td>\n",
       "      <td>3.0</td>\n",
       "      <td>-0.476469</td>\n",
       "      <td>0.511265</td>\n",
       "      <td>-0.869209</td>\n",
       "      <td>-0.765513</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>-0.280099</td>\n",
       "      <td>0.101619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2.0</td>\n",
       "      <td>-1.491199</td>\n",
       "      <td>0.393930</td>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-1.630474</td>\n",
       "      <td>-1.491199</td>\n",
       "      <td>-1.351923</td>\n",
       "      <td>-1.212648</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-1.599576</td>\n",
       "      <td>1.417379</td>\n",
       "      <td>-2.601815</td>\n",
       "      <td>-2.100695</td>\n",
       "      <td>-1.599576</td>\n",
       "      <td>-1.098457</td>\n",
       "      <td>-0.597338</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     data1                                                              \\\n",
       "     count      mean       std       min       25%       50%       75%   \n",
       "key1                                                                     \n",
       "a      3.0  0.165556  1.021636 -0.785056 -0.374583  0.035891  0.640863   \n",
       "b      2.0 -1.491199  0.393930 -1.769749 -1.630474 -1.491199 -1.351923   \n",
       "\n",
       "               data2                                                    \\\n",
       "           max count      mean       std       min       25%       50%   \n",
       "key1                                                                     \n",
       "a     1.245835   3.0 -0.476469  0.511265 -0.869209 -0.765513 -0.661818   \n",
       "b    -1.212648   2.0 -1.599576  1.417379 -2.601815 -2.100695 -1.599576   \n",
       "\n",
       "                          \n",
       "           75%       max  \n",
       "key1                      \n",
       "a    -0.280099  0.101619  \n",
       "b    -1.098457 -0.597338  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1_grouped_by_key1.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('a',       data1     data2 key1 key2\n",
      "0  1.245835  0.101619    a  one\n",
      "1 -0.785056 -0.661818    a  two\n",
      "4  0.035891 -0.869209    a  one), ('b',       data1     data2 key1 key2\n",
      "2 -1.769749 -0.597338    b  one\n",
      "3 -1.212648 -2.601815    b  two)]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1  data2  key2\n",
       "key1                    \n",
       "a         3      3     3\n",
       "b         2      2     2"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print([x for x in df1_grouped_by_key1])\n",
    "df1_grouped_by_key1.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.DataFrameGroupBy object at 0x7fcc87498f60>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (2) groupby multiple columns\n",
    "df1_grouped_by_key1_key2 = df1.groupby(by=['key1', 'key2'])\n",
    "df1_grouped_by_key1_key2 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(('a', 'one'),       data1     data2 key1 key2\n",
      "0  1.245835  0.101619    a  one\n",
      "4  0.035891 -0.869209    a  one), (('a', 'two'),       data1     data2 key1 key2\n",
      "1 -0.785056 -0.661818    a  two), (('b', 'one'),       data1     data2 key1 key2\n",
      "2 -1.769749 -0.597338    b  one), (('b', 'two'),       data1     data2 key1 key2\n",
      "3 -1.212648 -2.601815    b  two)]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>one</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>one</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           data1  data2\n",
       "key1 key2              \n",
       "a    one       2      2\n",
       "     two       1      1\n",
       "b    one       1      1\n",
       "     two       1      1"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print([x for x in df1_grouped_by_key1_key2])\n",
    "df1_grouped_by_key1_key2.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1\n",
       "a    0.496669\n",
       "b   -2.982397\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (3) agg on grouped\n",
    "df1_grouped_by_key1['data1'].agg('sum')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">data1</th>\n",
       "      <th colspan=\"2\" halign=\"left\">data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>one</th>\n",
       "      <td>0.640863</td>\n",
       "      <td>1.281726</td>\n",
       "      <td>-0.383795</td>\n",
       "      <td>-0.767590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>-0.661818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>one</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-0.597338</td>\n",
       "      <td>-0.597338</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-2.601815</td>\n",
       "      <td>-2.601815</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              data1               data2          \n",
       "               mean       sum      mean       sum\n",
       "key1 key2                                        \n",
       "a    one   0.640863  1.281726 -0.383795 -0.767590\n",
       "     two  -0.785056 -0.785056 -0.661818 -0.661818\n",
       "b    one  -1.769749 -1.769749 -0.597338 -0.597338\n",
       "     two  -1.212648 -1.212648 -2.601815 -2.601815"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1_grouped_by_key1_key2['data1', 'data2'].agg(['mean', 'sum'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.165556</td>\n",
       "      <td>-0.476469</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.491199</td>\n",
       "      <td>-1.599576</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1     data2\n",
       "key1                    \n",
       "a     0.165556 -0.476469\n",
       "b    -1.491199 -1.599576"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (4) apply on grouped\n",
    "# the distinguish between agg and apply is 'apply' can action on all 'func'\n",
    "# agg: mean, sum, count and so on, apply can work with user define func\n",
    "df1_grouped_by_key1.agg('mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.165556</td>\n",
       "      <td>-0.476469</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.491199</td>\n",
       "      <td>-1.599576</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1     data2\n",
       "key1                    \n",
       "a     0.165556 -0.476469\n",
       "b    -1.491199 -1.599576"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sample above 'agg'\n",
    "df1_grouped_by_key1.apply(np.mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>one</th>\n",
       "      <td>0.640863</td>\n",
       "      <td>-0.383795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>one</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-0.597338</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-2.601815</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              data1     data2\n",
       "key1 key2                    \n",
       "a    one   0.640863 -0.383795\n",
       "     two  -0.785056 -0.661818\n",
       "b    one  -1.769749 -0.597338\n",
       "     two  -1.212648 -2.601815"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1_grouped_by_key1_key2.agg('mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>one</th>\n",
       "      <td>0.640863</td>\n",
       "      <td>-0.383795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>one</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-0.597338</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-2.601815</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              data1     data2\n",
       "key1 key2                    \n",
       "a    one   0.640863 -0.383795\n",
       "     two  -0.785056 -0.661818\n",
       "b    one  -1.769749 -0.597338\n",
       "     two  -1.212648 -2.601815"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sample above 'agg'\n",
    "df1_grouped_by_key1_key2.apply(np.mean)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. pivot_table & pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.245835</td>\n",
       "      <td>0.101619</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-0.597338</td>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-2.601815</td>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.035891</td>\n",
       "      <td>-0.869209</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "0  1.245835  0.101619    a  one\n",
       "1 -0.785056 -0.661818    a  two\n",
       "2 -1.769749 -0.597338    b  one\n",
       "3 -1.212648 -2.601815    b  two\n",
       "4  0.035891 -0.869209    a  one"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">data1</th>\n",
       "      <th colspan=\"2\" halign=\"left\">data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key2</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.640863</td>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.383795</td>\n",
       "      <td>-0.661818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-0.597338</td>\n",
       "      <td>-2.601815</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1               data2          \n",
       "key2       one       two       one       two\n",
       "key1                                        \n",
       "a     0.640863 -0.785056 -0.383795 -0.661818\n",
       "b    -1.769749 -1.212648 -0.597338 -2.601815"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (1) Stat. by group row and column\n",
    "df1.pivot_table(index='key1', columns='key2')\n",
    "# df1.pivot_table(index='key1', columns='key2', values='data1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">data1</th>\n",
       "      <th colspan=\"3\" halign=\"left\">data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key2</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>All</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.640863</td>\n",
       "      <td>-0.785056</td>\n",
       "      <td>0.165556</td>\n",
       "      <td>-0.383795</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>-0.476469</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-1.491199</td>\n",
       "      <td>-0.597338</td>\n",
       "      <td>-2.601815</td>\n",
       "      <td>-1.599576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>-0.162674</td>\n",
       "      <td>-0.998852</td>\n",
       "      <td>-0.497146</td>\n",
       "      <td>-0.454976</td>\n",
       "      <td>-1.631816</td>\n",
       "      <td>-0.925712</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1                         data2                    \n",
       "key2       one       two       All       one       two       All\n",
       "key1                                                            \n",
       "a     0.640863 -0.785056  0.165556 -0.383795 -0.661818 -0.476469\n",
       "b    -1.769749 -1.212648 -1.491199 -0.597338 -2.601815 -1.599576\n",
       "All  -0.162674 -0.998852 -0.497146 -0.454976 -1.631816 -0.925712"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (2) Stat. sumary\n",
    "df1.pivot_table(index='key1', columns='key2', margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item</th>\n",
       "      <th>CType</th>\n",
       "      <th>USD</th>\n",
       "      <th>EU</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Item0</td>\n",
       "      <td>Gold</td>\n",
       "      <td>1$</td>\n",
       "      <td>1€</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Item0</td>\n",
       "      <td>Bronze</td>\n",
       "      <td>2$</td>\n",
       "      <td>2€</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Item1</td>\n",
       "      <td>Gold</td>\n",
       "      <td>3$</td>\n",
       "      <td>3€</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Item1</td>\n",
       "      <td>Silver</td>\n",
       "      <td>4$</td>\n",
       "      <td>4€</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Item   CType USD  EU\n",
       "0  Item0    Gold  1$  1€\n",
       "1  Item0  Bronze  2$  2€\n",
       "2  Item1    Gold  3$  3€\n",
       "3  Item1  Silver  4$  4€"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (3) distinguish: pivot for reshape, row <-> column\n",
    "# https://nikgrozev.com/2015/07/01/reshaping-in-pandas-pivot-pivot-table-stack-and-unstack-explained-with-pictures/\n",
    "# Index contains duplicate entries, cannot reshape\n",
    "# df1.pivot(index='key1', columns='key2')\n",
    "\n",
    "# if change Item[2]: Item1 --> Item0, will error: Index contains duplicate entries, cannot reshape\n",
    "# so pivot_table can solve this problem\n",
    "\n",
    "from collections import OrderedDict\n",
    "table = OrderedDict((\n",
    "    (\"Item\", ['Item0', 'Item0', 'Item1', 'Item1']),\n",
    "    ('CType',['Gold', 'Bronze', 'Gold', 'Silver']),\n",
    "    ('USD',  ['1$', '2$', '3$', '4$']),\n",
    "    ('EU',   ['1€', '2€', '3€', '4€'])\n",
    "))\n",
    "df2 = pd.DataFrame(table)\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>USD</th>\n",
       "      <th>EU</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item</th>\n",
       "      <th>CType</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Item0</th>\n",
       "      <th>Bronze</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gold</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Item1</th>\n",
       "      <th>Gold</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Silver</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              USD  EU\n",
       "Item  CType          \n",
       "Item0 Bronze    1   1\n",
       "      Gold      1   1\n",
       "Item1 Gold      1   1\n",
       "      Silver    1   1"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.groupby(by=['Item', 'CType']).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>CType</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Item0</th>\n",
       "      <td>2$</td>\n",
       "      <td>1$</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item1</th>\n",
       "      <td>None</td>\n",
       "      <td>3$</td>\n",
       "      <td>4$</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "CType Bronze Gold Silver\n",
       "Item                    \n",
       "Item0     2$   1$   None\n",
       "Item1   None   3$     4$"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2_piv1_item = df2.pivot(index='Item', columns='CType', values='USD')\n",
    "df2_piv1_item"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">USD</th>\n",
       "      <th colspan=\"3\" halign=\"left\">EU</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CType</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Item0</th>\n",
       "      <td>2$</td>\n",
       "      <td>1$</td>\n",
       "      <td>None</td>\n",
       "      <td>2€</td>\n",
       "      <td>1€</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item1</th>\n",
       "      <td>None</td>\n",
       "      <td>3$</td>\n",
       "      <td>4$</td>\n",
       "      <td>None</td>\n",
       "      <td>3€</td>\n",
       "      <td>4€</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         USD                 EU            \n",
       "CType Bronze Gold Silver Bronze Gold Silver\n",
       "Item                                       \n",
       "Item0     2$   1$   None     2€   1€   None\n",
       "Item1   None   3$     4$   None   3€     4€"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2_piv2_item = df2.pivot(index='Item', columns='CType')\n",
    "df2_piv2_item"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item</th>\n",
       "      <th>CType</th>\n",
       "      <th>USD</th>\n",
       "      <th>EU</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Item0</td>\n",
       "      <td>Gold</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Item0</td>\n",
       "      <td>Bronze</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Item0</td>\n",
       "      <td>Gold</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Item1</td>\n",
       "      <td>Silver</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Item   CType USD EU\n",
       "0  Item0    Gold   1  1\n",
       "1  Item0  Bronze   2  2\n",
       "2  Item0    Gold   3  3\n",
       "3  Item1  Silver   4  4"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# make error test, change Item1 -> Item0\n",
    "table = OrderedDict((\n",
    "    (\"Item\", ['Item0', 'Item0', 'Item0', 'Item1']),\n",
    "    ('CType',['Gold', 'Bronze', 'Gold', 'Silver']),\n",
    "    ('USD',  ['1', '2', '3', '4']),\n",
    "    ('EU',   ['1', '2', '3', '4'])\n",
    "))\n",
    "\n",
    "df2_2 = pd.DataFrame(table)\n",
    "df2_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>USD</th>\n",
       "      <th>EU</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item</th>\n",
       "      <th>CType</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Item0</th>\n",
       "      <th>Bronze</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gold</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item1</th>\n",
       "      <th>Silver</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              USD  EU\n",
       "Item  CType          \n",
       "Item0 Bronze    1   1\n",
       "      Gold      2   2\n",
       "Item1 Silver    1   1"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Item0 and Gold --> 2, 2\n",
    "df2_2.groupby(by=['Item', 'CType']).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">EU</th>\n",
       "      <th colspan=\"3\" halign=\"left\">USD</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CType</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Item0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item1</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>4</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          EU                 USD             \n",
       "CType Bronze  Gold Silver Bronze  Gold Silver\n",
       "Item                                         \n",
       "Item0      2     3   None      2     3   None\n",
       "Item1   None  None      4   None  None      4"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Error: Index contains duplicate entries, cannot reshape\n",
    "# df2_2_piv2_item = df2_2.pivot(index='Item', columns='CType')\n",
    "# df2_2_piv2_item\n",
    "\n",
    "# using pivot_table instead, aggregate these duplicate entries, must assign the aggfunc\n",
    "# Error: No numeric types to aggregate if aggfunc=np.mean, why ?\n",
    "df2_2_piv_item = df2_2.pivot_table(index='Item', columns='CType', aggfunc=np.max)\n",
    "df2_2_piv_item"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. crosstab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>key2</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "key2  one  two  All\n",
       "key1               \n",
       "a       2    1    3\n",
       "b       1    1    2\n",
       "All     3    2    5"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (1) cross tab is-a special pivot table that calculate the frequences of grouped \n",
    "pd.crosstab(index=df1.key1, columns=df1.key2, margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">data1</th>\n",
       "      <th colspan=\"3\" halign=\"left\">data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key2</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>All</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     data1           data2          \n",
       "key2   one  two  All   one  two  All\n",
       "key1                                \n",
       "a      2.0  1.0  3.0   2.0  1.0  3.0\n",
       "b      1.0  1.0  2.0   1.0  1.0  2.0\n",
       "All    3.0  2.0  5.0   3.0  2.0  5.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.pivot_table(index=['key1'], columns=['key2'], margins=True, aggfunc=len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>key2</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.035891</td>\n",
       "      <td>-0.785056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-1.212648</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "key2       one       two\n",
       "key1                    \n",
       "a     0.035891 -0.785056\n",
       "b    -1.769749 -1.212648"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (2)\n",
    "pd.crosstab(index=df1.key1, columns=df1.key2, values=df1.data1, aggfunc=np.min)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 二. 过滤\n",
    "\n",
    "[参考](https://www.jianshu.com/p/26355cf3ea9b)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. loc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.245835</td>\n",
       "      <td>0.101619</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.769749</td>\n",
       "      <td>-0.597338</td>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.212648</td>\n",
       "      <td>-2.601815</td>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.035891</td>\n",
       "      <td>-0.869209</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "0  1.245835  0.101619    a  one\n",
       "1 -0.785056 -0.661818    a  two\n",
       "2 -1.769749 -0.597338    b  one\n",
       "3 -1.212648 -2.601815    b  two\n",
       "4  0.035891 -0.869209    a  one"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.245835</td>\n",
       "      <td>0.101619</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.785056</td>\n",
       "      <td>-0.661818</td>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.035891</td>\n",
       "      <td>-0.869209</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "0  1.245835  0.101619    a  one\n",
       "1 -0.785056 -0.661818    a  two\n",
       "4  0.035891 -0.869209    a  one"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.loc[df1.key1=='a']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.4/dist-packages/pandas/core/computation/check.py:17: UserWarning: The installed version of numexpr 2.2.2 is not supported in pandas and will be not be used\n",
      "The minimum supported version is 2.4.6\n",
      "\n",
      "  ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [data1, data2, key1, key2]\n",
       "Index: []"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.query('data1 > -0.4 & data1 < -0.1')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
